ARG CUDA_VERSION=12.8.1
# Base image with CUDA for compilation
FROM docker.io/nvidia/cuda:${CUDA_VERSION}-devel-ubi9 AS builder

ENV UV_PYTHON_INSTALL_DIR="/opt/uv/python"
ENV UV_HTTP_TIMEOUT=500
ENV UV_INDEX_STRATEGY="unsafe-best-match"
ENV UV_LINK_MODE="copy"

COPY . /src/ramalama
WORKDIR /src/ramalama
RUN container-images/scripts/build-vllm.sh "cuda"
WORKDIR /

# Final runtime image
FROM quay.io/ramalama/cuda:latest

COPY --from=builder /usr/local/cuda/ /usr/local/cuda/
COPY --from=builder /opt /opt

RUN dnf install -y gcc gcc-c++ numactl && pip3.11 install --no-cache-dir ninja && dnf clean all
